# Load these libraries:
# 
# library(sf)
# library(mapview)
# library(lubridate)
# library(tidyverse)

# Use classical leaflet/htmlwidgets rendering.
mapviewOptions(fgb = FALSE)
# https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Census-Tracts-2010/5jrd-6zik
# 
# Read in the Chicago 2010 Census tracts.
# Rename the ID column.
# Select only the GEOID column.
# Preview a map.
(geo.chi <- st_read('chicago_tracts_2010.geojson') %>% 
            rename(GEOID = geoid10) %>% 
            select(GEOID)) %>% mapview(legend = F, col.regions = 'deeppink')
## Reading layer `chicago_tracts_2010' from data source `/Users/erin/Documents/NU GDrive/SICSS/2021/Mapping Lightning Talk/chicago_tracts_2010.geojson' using driver `GeoJSON'
## Simple feature collection with 801 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -87.94025 ymin: 41.64429 xmax: -87.52366 ymax: 42.02392
## geographic CRS: WGS 84
# https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-Present/ijzp-q8t2/data
# 
# Read in Chicago violent crimes (2020):
#   homicide, robbery, criminal sexual assault, battery, & assault.
# Select necessary columns.
# Change Date from mdy_hms string to ymd date.
# Shorten the CSA description to an abbreviation.
# Drop observations with missing coordinates.
# Convert to spatial points geometry.
# Set CRS.
geo.crimes <- read.csv('ViolentCrime2020.csv') %>% 
              select(ID, Date, Primary.Type, Arrest,
                     Domestic, Longitude, Latitude) %>% 
              mutate(Date = mdy_hms(Date) %>% date(),
                     Primary.Type = ifelse(Primary.Type == 'CRIMINAL SEXUAL ASSAULT',
                                           'CSA',
                                           Primary.Type)) %>% 
              filter(!is.na(Longitude)) %>% 
              st_as_sf(coords = c('Longitude', 'Latitude')) %>%
              st_set_crs(4326)

# Count by crime type.
geo.crimes %>% as.data.frame() %>% group_by(Primary.Type) %>% tally(n = 'Count')
## # A tibble: 5 x 2
##   Primary.Type Count
##   <chr>        <int>
## 1 ASSAULT      18211
## 2 BATTERY      41422
## 3 CSA           1075
## 4 HOMICIDE       789
## 5 ROBBERY       7849
# Glance at a map for August.
geo.crimes %>% filter(month(Date) == 8) %>%
  mapview(cex = .5, lwd = 0,  legend = F) + 
  mapview(geo.chi, alpha.regions = 0, legend = F)
# Read in Cook County population data; take a glimpse.
(df.cook <- read.csv('cook_census.csv')) %>% head()
##         GEOID                                        NAME Population
## 1 17031630200    Census Tract 6302, Cook County, Illinois       1825
## 2 17031580700    Census Tract 5807, Cook County, Illinois       5908
## 3 17031590600    Census Tract 5906, Cook County, Illinois       3419
## 4 17031600700    Census Tract 6007, Cook County, Illinois       2835
## 5 17031611900    Census Tract 6119, Cook County, Illinois       1639
## 6 17031804505 Census Tract 8045.05, Cook County, Illinois       3445
# Merge the population data (county) into Chicago tracts. This will drop the
# data for the tracts outside the city.
# Glance at a polygon map (rename the layer; increase the opacity; add heavier borders).
(geo.chi.acs <- geo.chi %>% merge(df.cook)) %>%
  mapview(zcol = 'Population', layer.name = 'Population', alpha.regions = .65, lwd = .85)
# Use a spatial join to find out which Census tract each point is in.
geo.joined <- st_join(geo.crimes, geo.chi.acs, st_within)

# Tally up violent crimes by type.
# Pivot wider to get one observation per tract and one column for counts of each
# crime type. Fill in NA observations (where there were no such crimes) with zeros.
(df.ct.by_type <- geo.joined %>%
                  as.data.frame() %>%
                  group_by(GEOID, Primary.Type) %>%
                  tally(n = 'Ct') %>% 
                  pivot_wider(GEOID, names_from = Primary.Type, names_prefix = 'Ct.',
                              values_from = Ct, values_fill = 0)) %>% head()
## # A tibble: 6 x 6
## # Groups:   GEOID [6]
##   GEOID       Ct.ASSAULT Ct.BATTERY Ct.CSA Ct.HOMICIDE Ct.ROBBERY
##   <chr>            <int>      <int>  <int>       <int>      <int>
## 1 17031010100         36        120      7           2         14
## 2 17031010201         31         78      0           0         13
## 3 17031010202         45         80      4           0         17
## 4 17031010300         28         57      6           0         16
## 5 17031010400         32         41      5           0          6
## 6 17031010501         10         26      2           0         12
# Tally up all types of violent crimes and merge in the by-type counts.
(df.ct.all <- geo.joined %>%
              as.data.frame() %>%
              group_by(GEOID) %>%
              tally(n = 'Ct.ALL') %>% 
              merge(df.ct.by_type)) %>% head()
##         GEOID Ct.ALL Ct.ASSAULT Ct.BATTERY Ct.CSA Ct.HOMICIDE Ct.ROBBERY
## 1 17031010100    179         36        120      7           2         14
## 2 17031010201    122         31         78      0           0         13
## 3 17031010202    146         45         80      4           0         17
## 4 17031010300    107         28         57      6           0         16
## 5 17031010400     84         32         41      5           0          6
## 6 17031010501     50         10         26      2           0         12
# Merge counts back into geometry.
# Drop observations with empty geometry.
# Look at a count map.
(geo.counts <- geo.chi.acs %>%
               merge(df.ct.all, all = T) %>% 
               filter(!st_is_empty(.))) %>%
  mapview(zcol = 'Ct.ALL', lwd = .75, layer.name = 'All Violent Crimes (Count)')
# Make a dataframe of crime rates for each type of crime.
# First, drop observations with a population of zero (can't do a rate with division by 0).
# Next, mutate across count columns and transform to rate per 100,000 residents.
# Rename the columns appropriately (Rt. for rate instead of Ct. for count).
# Build a map.
(geo.rates <- geo.counts %>%
              filter(Population > 0) %>% 
              mutate(across(starts_with('Ct.'), ~ (. / Population * 100000))) %>%
              rename_with(.fn = ~ str_replace(., 'Ct.', 'Rt.'),
                          .cols = starts_with('Ct.'))) %>%
  mapview(zcol = 'Rt.ALL', layer.name = 'All Violent Crimes per 100,000', lwd = 1)
# Add the crime counts and rates to the same dataframe:
# First, convert geo.rates to a dataframe.
# Then drop the geometry column.
# Finally, merge it *from the right* into geo.counts. Keep all rows on both sides. BUT
# then drop any observations with missing geometry.
geo.crime_measures <- geo.rates %>%
                      as.data.frame() %>%
                      select(-geometry) %>%
                      merge(geo.counts, ., all = T)

# Write the dataset, overwriting any existing file of the same name.
st_write(geo.crime_measures, 'chicago_violent-crime-2020.geojson', delete_dsn = T)
## Deleting source `chicago_violent-crime-2020.geojson' using driver `GeoJSON'
## Writing layer `chicago_violent-crime-2020' to data source `chicago_violent-crime-2020.geojson' using driver `GeoJSON'
## Writing 801 features with 15 fields and geometry type Multi Polygon.